import matplotlib.pyplot as plt
import seaborn as sns
import os
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans
from sklearn.metrics import silhouette_score
import numpy as np
from scipy import stats
import sys
sys.path.append(sys.argv[1])

import pandas as pd   
import matplotlib.pyplot as plt  

   
# Load the dataset   
credit_customers = pd.read_csv(os.path.join(sys.argv[1], 'credit_customers.csv')) 
  
# Plot histograms   
fig, axes = plt.subplots(2, 2, figsize=(12, 8))   

credit_customers['credit_history'].value_counts().plot(kind='bar', ax=axes[0, 0], title='Credit History')   
credit_customers['employment'].value_counts().plot(kind='bar', ax=axes[0, 1], title='Employment')   
credit_customers['age'].plot(kind='hist', bins=20, ax=axes[1, 0], title='Age')   
credit_customers['credit_amount'].plot(kind='hist', bins=20, ax=axes[1, 1], title='Credit Amount')   

plt.tight_layout()   
plt.savefig('ref_result/subplots.png')
# plt.show() 

  

import pandas as pd   
import pickle
   
# Load the dataset   
  
approval_rates_credit_history = credit_customers.groupby('credit_history')['class'].value_counts(normalize=True).unstack().fillna(0)['good']   

print(approval_rates_credit_history)
# pickle.dump(approval_rates_credit_history,open("./ref_result/approval_rates_credit_history.pkl","wb"))

  

import pandas as pd   
import pickle
   
# Load the dataset   
  
approval_rates_employment = credit_customers.groupby('employment')['class'].value_counts(normalize=True).unstack().fillna(0)['good']      

print(approval_rates_employment)
# pickle.dump(approval_rates_employment,open("./ref_result/approval_rates_employment.pkl","wb"))

  

import pandas as pd   
import pickle
   
# Load the dataset   
  
approval_rates_middle_age = credit_customers[(credit_customers['age'] > 40) & (credit_customers['age'] < 59)].groupby('age')['class'].value_counts(normalize=True).unstack().fillna(0)['good'] 

print(approval_rates_middle_age)
# pickle.dump(approval_rates_middle_age,open("./ref_result/approval_rates_middle_age.pkl","wb"))

  

import pandas as pd   
import pickle
   
# Load the dataset   
  
credit_customers['credit_amount_range'] = pd.cut(credit_customers['credit_amount'], bins=[0, 2000, 4000, 6000, 8000, 10000, 12000, 14000, 16000, 18000, 20000], include_lowest=True)   

approval_rates_credit_amount = credit_customers.groupby('credit_amount_range')['class'].value_counts(normalize=True).unstack().fillna(0)['good'] 

print(approval_rates_credit_amount)
# pickle.dump(approval_rates_credit_amount,open("./ref_result/approval_rates_credit_amount.pkl","wb"))

  

import pandas as pd   
import matplotlib.pyplot as plt
   
# Load the dataset   
  
fig, axes = plt.subplots(2, 2, figsize=(12, 8))   

approval_rates_credit_history.plot(kind='bar', ax=axes[0, 0], title='Approval Rates by Credit History')   
approval_rates_employment.plot(kind='bar', ax=axes[0, 1], title='Approval Rates by Employment Status')   
approval_rates_middle_age.plot(kind='bar', ax=axes[1, 0], title='Approval Rates for Middle-Age Group (40-59)')   
approval_rates_credit_amount.plot(kind='bar', ax=axes[1, 1], title='Approval Rates by Credit Amount Range')   

plt.tight_layout()   
plt.savefig('ref_result/bar_chart.png')
# plt.show()   
  

import pandas as pd   
import pickle
   
# Load the dataset   
  
credit_customers['class_binary'] = credit_customers['class'].apply(lambda x: 1 if x == 'good' else 0)

print(credit_customers['class_binary'])
# pickle.dump(credit_customers['class_binary'],open("./ref_result/credit_customers_class_binary.pkl","wb"))

  

import pandas as pd   
import pickle
   
# Load the dataset   
  
credit_customers_numerical = credit_customers.copy()   
credit_customers_numerical['credit_history'] = credit_customers_numerical['credit_history'].astype('category').cat.codes   
credit_customers_numerical['employment'] = credit_customers_numerical['employment'].astype('category').cat.codes   

print(credit_customers_numerical)
# pickle.dump(credit_customers_numerical,open("./ref_result/credit_customers_numerical.pkl","wb"))

  

import pandas as pd   
import matplotlib.pyplot as plt   
import seaborn as sns 
   
# Load the dataset   

# Plot scatter plots  
fig, axes = plt.subplots(2, 2, figsize=(12, 8))  
  
sns.scatterplot(data=credit_customers_numerical, x='credit_history', y='class_binary', ax=axes[0, 0])   
axes[0, 0].set_title('Loan Approval Rates vs. Credit History')  

plt.tight_layout()  
plt.savefig('ref_result/scatterplot_1.png')
# plt.show() 

  

import pandas as pd   
import matplotlib.pyplot as plt   
import seaborn as sns 
   
# Load the dataset   

# Plot scatter plots  
fig, axes = plt.subplots(2, 2, figsize=(12, 8))  
  
sns.scatterplot(data=credit_customers_numerical, x='employment', y='class_binary', ax=axes[0, 1])   
axes[0, 1].set_title('Loan Approval Rates vs. Employment Status')   

plt.tight_layout()  
plt.savefig('ref_result/scatterplot_2.png')
# plt.show() 

  

import pandas as pd   
import matplotlib.pyplot as plt   
import seaborn as sns 
   
# Load the dataset   

# Plot scatter plots  
fig, axes = plt.subplots(2, 2, figsize=(12, 8))  
  
sns.scatterplot(data=credit_customers_numerical, x='age', y='class_binary', ax=axes[1, 0])   
axes[1, 0].set_title('Loan Approval Rates vs. Age')   

sns.scatterplot(data=credit_customers_numerical, x='credit_amount', y='class_binary', ax=axes[1, 1])   
axes[1, 1].set_title('Loan Approval Rates vs. Credit Amount')   

plt.tight_layout()  
plt.savefig('ref_result/scatterplot_3.png')
# plt.show() 

  

import pandas as pd   
import pickle
   
# Load the dataset   
  
middle_aged_customers = credit_customers[(credit_customers['age'] > 40) & (credit_customers['age'] < 59)]   

stable_employment_customers = credit_customers[credit_customers['employment'].isin(['>=7', '4<=X<7'])]   

good_credit_history_customers = credit_customers[credit_customers['credit_history'].isin(['existing paid', 'no credits/all paid', 'all paid'])] 

print(middle_aged_customers)
# pickle.dump(middle_aged_customers,open("./ref_result/middle_aged_customers.pkl","wb"))

print(stable_employment_customers)
# pickle.dump(stable_employment_customers,open("./ref_result/stable_employment_customers.pkl","wb"))

print(good_credit_history_customers)
# pickle.dump(good_credit_history_customers,open("./ref_result/good_credit_history_customers.pkl","wb"))

  



import pandas as pd   
import pickle
   
# Load the dataset   
  
approval_rates_middle_aged = middle_aged_customers['class'].value_counts(normalize=True)   

approval_rates_stable_employment = stable_employment_customers['class'].value_counts(normalize=True)   

approval_rates_good_credit_history = good_credit_history_customers['class'].value_counts(normalize=True) 

print(approval_rates_middle_aged)
pickle.dump(approval_rates_middle_aged,open("./ref_result/approval_rates_middle_aged.pkl","wb"))

print(approval_rates_stable_employment)
pickle.dump(approval_rates_stable_employment,open("./ref_result/approval_rates_stable_employment.pkl","wb"))

print(approval_rates_good_credit_history)
pickle.dump(approval_rates_good_credit_history,open("./ref_result/approval_rates_good_credit_history.pkl","wb"))

  